Paths to modify

path_to_CDI="/Users/elinlarsen/GoogleDrive/PhD_elin/Projets/CDSwordseg_Pipeline/CDI/english/"
res='/Users/elinlarsen/GoogleDrive/PhD_elin/Projets/CDSwordSeg_Pipeline/results/'
path_to_figures="/Users/elinlarsen/GoogleDrive/PhD_elin/our_papers/CogSci2018/figures/mai2018/"

#depends on the data architecture 
res_brent=paste(res, 'Brent/full_corpus', sep='/')
res_providence=paste(res, 'Providence/full_corpus', sep='/')
res_buckeye=paste(res, 'buckeye', sep='/')

For quick reproduction : skip this and go to the chunk load cdi-algo-data

else clean environment, modify paths and clean each script ### ALGOS parameters

algos=c('tp/relativeforward', 'tp/absoluteforward', 'tp/relativebackward', 'tp/absolutebackward','dibs','puddle','ag')
unit=c("phoneme", "syllable")
Asyll=paste(algos,"syllable", sep="/")
Aph=paste(algos,"phoneme", sep="/")
AU=c(Asyll, Aph)
AU_g=list.append(AU,"gold")

Data processing

brent=read_gold(res_brent, "brent")
## # A tibble: 6,366 x 2
##    count Type 
##    <int> <chr>
##  1 24727 you  
##  2 11233 the  
##  3  7721 a    
##  4  7518 it   
##  5  6323 that 
##  6  6048 your 
##  7  5552 are  
##  8  5519 no   
##  9  5439 i    
## 10  5154 and  
## # ... with 6,356 more rows
providence=read_gold(res_providence, "providence")
## # A tibble: 6,589 x 2
##    count Type 
##    <int> <chr>
##  1 11003 the  
##  2 10552 you  
##  3  7233 a    
##  4  5038 and  
##  5  4456 it   
##  6  4217 i    
##  7  4179 to   
##  8  3969 that 
##  9  3663 are  
## 10  3527 m@l  
## # ... with 6,579 more rows
buckeye=read_gold(res_buckeye, "buckeye")
## # A tibble: 9,189 x 2
##    count Type 
##    <int> <chr>
##  1 12391 i    
##  2 10903 and  
##  3  8314 the  
##  4  8023 you  
##  5  6393 to   
##  6  6170 know 
##  7  6088 a    
##  8  5342 that 
##  9  4595 it   
## 10  4059 like 
## # ... with 9,179 more rows
freq_algos_brent=read_algorithms_results(res_brent, algos, res_brent, 'brent')
## [1] 6366
freq_algos_providence=read_algorithms_results(res_providence , algos, res_providence, 'providence')
## [1] 6589
freq_algos_buckeye=read_algorithms_results(res_buckeye,  algos, res_buckeye, 'buckeye')
## [1] 9189
freq_all=dplyr::bind_rows(brent, freq_algos_brent, providence, freq_algos_providence , buckeye, freq_algos_buckeye)%>%
  mutate(uni_lemma=Type)%>%
  select(-Type)

#knitr::kable(head(freq_all), format = "html")%>%
#kable_styling(bootstrap_options = c("striped", "hover"))

Check number of words per algos

for (a in AU_g)
{ 
  X<-freq_all%>%
  filter(au==a, corpus=="brent")%>%
  group_by(uni_lemma, algos)%>%
  n_groups()
  print(paste(a, X, sep= ": "))
  }
## [1] "tp/relativeforward/syllable: 6366"
## [1] "tp/absoluteforward/syllable: 6366"
## [1] "tp/relativebackward/syllable: 6366"
## [1] "tp/absolutebackward/syllable: 6366"
## [1] "dibs/syllable: 6366"
## [1] "puddle/syllable: 6366"
## [1] "ag/syllable: 6371"
## [1] "tp/relativeforward/phoneme: 6366"
## [1] "tp/absoluteforward/phoneme: 6366"
## [1] "tp/relativebackward/phoneme: 6366"
## [1] "tp/absolutebackward/phoneme: 6366"
## [1] "dibs/phoneme: 6366"
## [1] "puddle/phoneme: 6366"
## [1] "ag/phoneme: 6374"
## [1] "gold: 6366"

Stemming : -> FOR LATER ANALYSIS

Not working for now

#freq_all$lemma=stem(freq_all$Type, "english") 

last version of CDI from Braginsky 2018

load(paste('/Users/elinlarsen/GoogleDrive/PhD_elin/Projets/CDSwordseg_Pipeline/CDI/', 'uni_prop_data.RData', sep="/"),  .GlobalEnv)

new_prop<-uni_prop_data%>%
  filter(language=="English (American)")

#linguistics<-d_prop%>%
  #mutate(uni_lemma=Type, measure=form)%>%
  #select(-prop)
  
#linguistics$measure[linguistics$measure=="WG_production"]="produces"
#linguistics$measure[linguistics$measure=="WS_production"]="produces"
#linguistics$measure[linguistics$measure=="WG_comprehension"]="understands"

#new_prop=merge(new_prop_eng, linguistics, by=c("uni_lemma", "age", "measure"))
head(new_prop)
## # A tibble: 6 x 8
## # Groups:   language, measure, uni_lemma [1]
##   language     measure uni_lemma   age num_true num_false    prop items   
##   <chr>        <chr>   <chr>     <int>    <int>     <int>   <dbl> <list>  
## 1 English (Am… produc… airplane      8        0        35 0       <tibble…
## 2 English (Am… produc… airplane      9        0        94 0       <tibble…
## 3 English (Am… produc… airplane     10        1       154 0.00645 <tibble…
## 4 English (Am… produc… airplane     11        3        91 0.0319  <tibble…
## 5 English (Am… produc… airplane     12        3       140 0.0210  <tibble…
## 6 English (Am… produc… airplane     13       31       739 0.0403  <tibble…

Number of uni_lemma types

new_prop%>%
group_by(uni_lemma)%>%
  n_groups()
## [1] 395

Merge cdi data and algo data by TYPE

new=merge(new_prop,freq_all)%>%
  select(-items, -num_true, -num_false)
new$log_freq=log(new$freq_smoothed)
new$log_count=log(new$count+1)

knitr::kable(head(new), format = "html")%>%
  kable_styling(bootstrap_options = c("striped", "hover"))
uni_lemma language measure age prop count count_gold unit algos freq_smoothed corpus au log_freq log_count
airplane English (American) understands 12 0.2097902 0 8 phoneme puddle 3.70e-06 buckeye puddle/phoneme -12.504324 0.000000
airplane English (American) understands 12 0.2097902 2 8 syllable tp/absolutebackward 2.00e-05 providence tp/absolutebackward/syllable -10.820771 1.098612
airplane English (American) understands 12 0.2097902 0 8 syllable dibs 3.70e-06 providence dibs/syllable -12.513851 0.000000
airplane English (American) understands 12 0.2097902 0 47 phoneme tp/relativeforward 3.50e-06 brent tp/relativeforward/phoneme -12.565166 0.000000
airplane English (American) understands 12 0.2097902 8 8 gold 2.88e-05 providence gold -10.454704 2.197225
airplane English (American) understands 12 0.2097902 5 8 syllable tp/relativeforward 8.57e-05 providence tp/relativeforward/syllable -9.364334 1.791759

Save this temporary data :

#save(new, file = paste(res, "/braginsky_cdi_algo_combined.RData",sep=""))

Load data frame containing word segmentation algorithm results and CDI data CLEAN

#load(new, file = paste(res, "/braginsky_cdi_algo_combined.RData",sep=""))

Check number of CDI words per algos

for (a in AU_g)
{ 
  X<-new%>%
  filter(au==a)%>%
  group_by(uni_lemma, algos)%>%
  n_groups()
  print(paste(a, X, sep= ": "))
  }
## [1] "tp/relativeforward/syllable: 336"
## [1] "tp/absoluteforward/syllable: 336"
## [1] "tp/relativebackward/syllable: 336"
## [1] "tp/absolutebackward/syllable: 336"
## [1] "dibs/syllable: 336"
## [1] "puddle/syllable: 336"
## [1] "ag/syllable: 336"
## [1] "tp/relativeforward/phoneme: 336"
## [1] "tp/absoluteforward/phoneme: 336"
## [1] "tp/relativebackward/phoneme: 336"
## [1] "tp/absolutebackward/phoneme: 336"
## [1] "dibs/phoneme: 336"
## [1] "puddle/phoneme: 336"
## [1] "ag/phoneme: 336"
## [1] "gold: 336"

Model parameters

AGES_comp=seq(8,18)
AGES_prod=seq(8,30)
CORPUS=c("brent", "providence", "buckeye")
MEASURE=c( 'understands')
if(MEASURE=="produces"){AGES=AGES_prod} else(AGES=AGES_comp)
##  [1]  8  9 10 11 12 13 14 15 16 17 18
AU_g=list.append(AU,"gold")
G=c("gold")
DATA=new

Linear models

by corpus-measure-algo-unit-age

R2_by_parameters=function(DATA, AGES, AG_g, CORPUS, MEASURE)
{
df=setNames(data.frame(matrix(ncol = 8, nrow = length(AGES)*length(AU_g)*length(CORPUS)*length(MEASURE))), c("age", "corpus", "measure", "au", "algo", "unit", "R2", "nbtokens"))
  index=0
  for (c in CORPUS)
  {
    for (f in MEASURE)
    {
      for (a in AGES)
      {
        for (A in AU_g)
        {
          index=index+1
          #print(paste(a, A, f, c, sep=" "))
          d<-DATA%>%
            filter(au %in% A,  age %in% a, corpus %in% c, measure%in% f)
          nb_tokens=d%>%
            group_by(uni_lemma)%>%
            n_groups()
          model=lm(prop ~  log(freq_smoothed), data=d)
          df$corpus[index]=c
          df$measure[index]=f
          df$age[index]=a
          df$au[index]=A
          
          if(A!="gold"){
            aa=head(strsplit(A[1], "/")[[1]], -1)
            if (length(aa)==2){df$algo[index]=paste(aa[1], aa[2], sep="/")}
            else(df$algo[index]=aa)
            df$unit[index]=last(strsplit(A[1], "/")[[1]])}
          if(A=="gold"){
            df$algo[index]=A
            df$unit[index]=" "
          }
          df$R2[index]=summary(model)$r.squared
          df$nbtokens[index]=nb_tokens
        }
      }
    }
  }
  return(R2_results=as_tibble(df))
}
R2_comp=R2_by_parameters(DATA, seq(8,18), AG_g, CORPUS, "understands")
R2_prod=R2_by_parameters(DATA, seq(8,30), AG_g, CORPUS, "produces")

Saving results

#write.table(R2_comp, paste(res, "/R2_eng_all_corpus_comprehension_BraginskyCDIData.csv", sep=""), na = "NA", append = FALSE, col.names = TRUE, sep="\t", row.names = FALSE)

#write.table(R2_prod, paste(res, "/R2_eng_all_corpus_production_BraginskyCDIData.csv", sep=""), na = "NA", append = FALSE, col.names = TRUE, sep="\t", row.names = FALSE)

Visualising results

ggplot(R2_comp%>%filter(au%in% c("gold","tp/relativeforward/syllable",
                                          "tp/relativebackward/syllable","ag/syllable")), aes(as.factor(age), R2, colour=au, shape=corpus))+
  geom_point(size=4, alpha=0.9) +
  facet_grid(. ~ corpus)+
  theme_bw(base_size=22) 

ggplot(R2_prod%>%filter(au%in% c("gold","tp/relativeforward/syllable", "tp/relativebackward/syllable", "ag/syllable"))
       , aes(as.factor(age), R2, colour=au, shape=corpus))+
  geom_point(size=4, alpha=0.9) +
  facet_grid( ~ corpus)+
  theme_bw(base_size=22) 

LM

Let’s check results from IS paper : corpus==brent, measure==understands, age==13

AGE=13
MEASURE="understands"
CORPUS="brent"
sub<-DATA%>%filter(age==AGE, measure==MEASURE, corpus==CORPUS)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m0 <- lm(prop ~ log(count+1)+ log(count+1):au, sub)
summary(m0)
## 
## Call:
## lm(formula = prop ~ log(count + 1) + log(count + 1):au, data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37385 -0.16064 -0.05532  0.12582  0.76014 
## 
## Coefficients:
##                                                 Estimate Std. Error
## (Intercept)                                    2.152e-01  5.035e-03
## log(count + 1)                                 1.943e-02  2.533e-03
## log(count + 1):auag/phoneme                   -6.145e-05  3.346e-03
## log(count + 1):auag/syllable                   6.909e-03  3.719e-03
## log(count + 1):audibs/phoneme                  2.191e-03  4.362e-03
## log(count + 1):audibs/syllable                -2.449e-03  3.549e-03
## log(count + 1):aupuddle/phoneme               -5.878e-04  3.495e-03
## log(count + 1):aupuddle/syllable               1.841e-03  3.510e-03
## log(count + 1):autp/absolutebackward/phoneme  -4.012e-05  4.375e-03
## log(count + 1):autp/absolutebackward/syllable  4.545e-03  3.620e-03
## log(count + 1):autp/absoluteforward/phoneme    4.215e-03  4.163e-03
## log(count + 1):autp/absoluteforward/syllable   3.017e-03  3.450e-03
## log(count + 1):autp/relativebackward/phoneme   2.847e-03  3.932e-03
## log(count + 1):autp/relativebackward/syllable  1.105e-02  4.150e-03
## log(count + 1):autp/relativeforward/phoneme    1.922e-03  3.813e-03
## log(count + 1):autp/relativeforward/syllable   6.490e-03  3.700e-03
##                                               t value Pr(>|t|)    
## (Intercept)                                    42.742  < 2e-16 ***
## log(count + 1)                                  7.672 2.03e-14 ***
## log(count + 1):auag/phoneme                    -0.018  0.98535    
## log(count + 1):auag/syllable                    1.858  0.06324 .  
## log(count + 1):audibs/phoneme                   0.502  0.61548    
## log(count + 1):audibs/syllable                 -0.690  0.49028    
## log(count + 1):aupuddle/phoneme                -0.168  0.86644    
## log(count + 1):aupuddle/syllable                0.524  0.60000    
## log(count + 1):autp/absolutebackward/phoneme   -0.009  0.99268    
## log(count + 1):autp/absolutebackward/syllable   1.256  0.20930    
## log(count + 1):autp/absoluteforward/phoneme     1.012  0.31144    
## log(count + 1):autp/absoluteforward/syllable    0.874  0.38189    
## log(count + 1):autp/relativebackward/phoneme    0.724  0.46903    
## log(count + 1):autp/relativebackward/syllable   2.662  0.00779 ** 
## log(count + 1):autp/relativeforward/phoneme     0.504  0.61415    
## log(count + 1):autp/relativeforward/syllable    1.754  0.07950 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2107 on 4919 degrees of freedom
## Multiple R-squared:  0.06188,    Adjusted R-squared:  0.05902 
## F-statistic: 21.63 on 15 and 4919 DF,  p-value: < 2.2e-16
plot_model(m0, type="std",  sort.est = TRUE, show.values = TRUE, value.offset = .3, axis.lim = c(-0.5, 0.5))

Let’s for each algorithm compare it predictive power to the gold

Only TPs RF with syllable

pair=c("tp/relativeforward/syllable", "gold", "ag/syllable")
m1=lm(prop~log(count+1) + log(count+1):au ,data=sub,subset=c(au %in% pair))
summary(m1)
## 
## Call:
## lm(formula = prop ~ log(count + 1) + log(count + 1):au, data = sub, 
##     subset = c(au %in% pair))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41816 -0.13789 -0.03344  0.11441  0.62039 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                  0.057267   0.016459   3.479
## log(count + 1)                               0.049477   0.003816  12.965
## log(count + 1):auag/syllable                 0.012499   0.003504   3.567
## log(count + 1):autp/relativeforward/syllable 0.012285   0.003490   3.520
##                                              Pr(>|t|)    
## (Intercept)                                  0.000525 ***
## log(count + 1)                                < 2e-16 ***
## log(count + 1):auag/syllable                 0.000378 ***
## log(count + 1):autp/relativeforward/syllable 0.000451 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.196 on 983 degrees of freedom
## Multiple R-squared:  0.1891, Adjusted R-squared:  0.1866 
## F-statistic: 76.42 on 3 and 983 DF,  p-value: < 2.2e-16
plot_model(m1, type="std", sort.est = TRUE, show.values = TRUE, value.offset = .3, axis.lim=c(0,0.5))

Look at the evoluation of age for all algorithms : only TP RF syllable has 1. an effect > 0.1, that is 2. significant

sub<-DATA%>%filter(measure==MEASURE, corpus==CORPUS)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m2 <- lm(prop ~ log(count+1)*age+ log(count+1):au:age, sub)
summary(m2)
## 
## Call:
## lm(formula = prop ~ log(count + 1) * age + log(count + 1):au:age, 
##     data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56112 -0.12793 -0.02549  0.10337  0.91611 
## 
## Coefficients:
##                                                     Estimate Std. Error
## (Intercept)                                       -3.905e-01  5.841e-03
## log(count + 1)                                    -2.228e-03  1.430e-03
## age                                                4.859e-02  4.368e-04
## log(count + 1):age                                 1.284e-03  1.163e-04
## log(count + 1):age:auag/phoneme                    2.415e-06  6.910e-05
## log(count + 1):age:auag/syllable                   4.042e-04  7.681e-05
## log(count + 1):age:audibs/phoneme                  1.054e-04  9.010e-05
## log(count + 1):age:audibs/syllable                -1.349e-04  7.330e-05
## log(count + 1):age:aupuddle/phoneme               -3.483e-05  7.219e-05
## log(count + 1):age:aupuddle/syllable               9.986e-05  7.250e-05
## log(count + 1):age:autp/absolutebackward/phoneme  -5.929e-05  9.036e-05
## log(count + 1):age:autp/absolutebackward/syllable  2.659e-04  7.477e-05
## log(count + 1):age:autp/absoluteforward/phoneme    2.132e-04  8.599e-05
## log(count + 1):age:autp/absoluteforward/syllable   1.974e-04  7.125e-05
## log(count + 1):age:autp/relativebackward/phoneme   1.616e-04  8.121e-05
## log(count + 1):age:autp/relativebackward/syllable  6.021e-04  8.572e-05
## log(count + 1):age:autp/relativeforward/phoneme    1.087e-04  7.875e-05
## log(count + 1):age:autp/relativeforward/syllable   4.075e-04  7.643e-05
##                                                   t value Pr(>|t|)    
## (Intercept)                                       -66.853  < 2e-16 ***
## log(count + 1)                                     -1.558 0.119323    
## age                                               111.236  < 2e-16 ***
## log(count + 1):age                                 11.038  < 2e-16 ***
## log(count + 1):age:auag/phoneme                     0.035 0.972116    
## log(count + 1):age:auag/syllable                    5.262 1.43e-07 ***
## log(count + 1):age:audibs/phoneme                   1.170 0.241975    
## log(count + 1):age:audibs/syllable                 -1.841 0.065667 .  
## log(count + 1):age:aupuddle/phoneme                -0.482 0.629472    
## log(count + 1):age:aupuddle/syllable                1.377 0.168407    
## log(count + 1):age:autp/absolutebackward/phoneme   -0.656 0.511731    
## log(count + 1):age:autp/absolutebackward/syllable   3.556 0.000377 ***
## log(count + 1):age:autp/absoluteforward/phoneme     2.479 0.013171 *  
## log(count + 1):age:autp/absoluteforward/syllable    2.770 0.005604 ** 
## log(count + 1):age:autp/relativebackward/phoneme    1.990 0.046620 *  
## log(count + 1):age:autp/relativebackward/syllable   7.024 2.18e-12 ***
## log(count + 1):age:autp/relativeforward/phoneme     1.380 0.167635    
## log(count + 1):age:autp/relativeforward/syllable    5.332 9.76e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1931 on 54267 degrees of freedom
## Multiple R-squared:  0.4469, Adjusted R-squared:  0.4467 
## F-statistic:  2579 on 17 and 54267 DF,  p-value: < 2.2e-16
plot_model(m2, type="std", sort.est = TRUE, show.values = TRUE, axis.lim=as.vector(c(0,0.5)))

#plotting interaction between age and algorithm-unit pair
#plot_model(m2, type = "int", terms = c("age", "au")) # not working !!!

Restriction to TP-RF syllable and gold

included=c("tp/relativeforward/syllable", "gold", "ag/syllable")
sub<-DATA%>%filter(measure==MEASURE, corpus==CORPUS, au %in% included)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m2_tp <- lm(prop ~ age+ log(count+1):age+ log(count+1):au:age, sub)
#m2_tp <- lm(prop ~ age + log():age+ log(freq_smoothed):au:age, sub) 
summary(m2_tp)
## 
## Call:
## lm(formula = prop ~ age + log(count + 1):age + log(count + 1):au:age, 
##     data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58230 -0.11345 -0.01771  0.09954  0.83078 
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                      -3.978e-01  7.425e-03
## age                                               3.965e-02  6.407e-04
## age:log(count + 1)                                2.924e-03  8.023e-05
## age:log(count + 1):auag/syllable                  7.394e-04  7.366e-05
## age:log(count + 1):autp/relativeforward/syllable  7.550e-04  7.337e-05
##                                                  t value Pr(>|t|)    
## (Intercept)                                       -53.58   <2e-16 ***
## age                                                61.88   <2e-16 ***
## age:log(count + 1)                                 36.44   <2e-16 ***
## age:log(count + 1):auag/syllable                   10.04   <2e-16 ***
## age:log(count + 1):autp/relativeforward/syllable   10.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1829 on 10852 degrees of freedom
## Multiple R-squared:  0.5043, Adjusted R-squared:  0.5041 
## F-statistic:  2760 on 4 and 10852 DF,  p-value: < 2.2e-16
plot_model(m2_tp, type="std", sort.est = TRUE, show.values = TRUE, axis.lim=as.vector(c(0,1)))

Weird fact : the p-value with count is clearly not the same than with freq_smoothed!

m=lm(prop~ log_freq*as.factor(age)*au, sub)
 plot_model(m, type="pred", terms=c("log_freq", "au", "age"))

Let’s for each algorithm compare it predictive power to the gold with age evolution

for (a in AU)
{ 
  included=c(a, "gold")
  #fit2=lm(prop~ age+ log(count+1):age + log(count+1):au:age ,data=sub,subset=c(au %in% included))
  #print(summary(fit2))
}

Now corpus=“brent”, measure=“produces”, algos=‘TP-RF-syll’

MEASURE="produces"
CORPUS="brent"
included=c("tp/relativeforward/syllable", "gold")
sub<-DATA%>%filter(measure==MEASURE, corpus==CORPUS, au %in% included)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m_b_prod <- lm(prop ~ age + log(count+1):age+ log(count+1):au:age, sub)
summary(m_b_prod)
## 
## Call:
## lm(formula = prop ~ age + log(count + 1):age + log(count + 1):au:age, 
##     data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49142 -0.10785  0.00057  0.10606  0.68676 
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                      -4.837e-01  3.797e-03
## age                                               3.707e-02  2.481e-04
## age:log(count + 1)                                1.237e-03  3.747e-05
## age:log(count + 1):autp/relativeforward/syllable  3.805e-04  2.875e-05
##                                                  t value Pr(>|t|)    
## (Intercept)                                      -127.41   <2e-16 ***
## age                                               149.41   <2e-16 ***
## age:log(count + 1)                                 33.01   <2e-16 ***
## age:log(count + 1):autp/relativeforward/syllable   13.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1538 on 15082 degrees of freedom
## Multiple R-squared:  0.7791, Adjusted R-squared:  0.779 
## F-statistic: 1.773e+04 on 3 and 15082 DF,  p-value: < 2.2e-16
plot_model(m_b_prod, type="std",  sort.est = TRUE, show.values = TRUE, value.offset = .3)

Now corpus=“providence”, measure=“understands”, algos=‘TP-RF-syll’

MEASURE="understands"
CORPUS="providence"
included=c("tp/relativeforward/syllable", "gold")
sub<-DATA%>%filter(measure==MEASURE, corpus==CORPUS, au %in% included)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m_b_prod <- lm(prop ~ age + log(count+1):age+ log(count+1):au:age, sub)
summary(m_b_prod)
## 
## Call:
## lm(formula = prop ~ age + log(count + 1):age + log(count + 1):au:age, 
##     data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51882 -0.12891 -0.02384  0.10682  0.86587 
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                      -4.025e-01  9.642e-03
## age                                               4.797e-02  8.500e-04
## age:log(count + 1)                                1.342e-03  1.162e-04
## age:log(count + 1):autp/relativeforward/syllable  5.348e-04  9.435e-05
##                                                  t value Pr(>|t|)    
## (Intercept)                                      -41.746  < 2e-16 ***
## age                                               56.433  < 2e-16 ***
## age:log(count + 1)                                11.549  < 2e-16 ***
## age:log(count + 1):autp/relativeforward/syllable   5.668  1.5e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1945 on 7278 degrees of freedom
## Multiple R-squared:  0.4401, Adjusted R-squared:  0.4399 
## F-statistic:  1907 on 3 and 7278 DF,  p-value: < 2.2e-16
plot_model(m_b_prod, type="std",  sort.est = TRUE, show.values = TRUE, value.offset = .3)

Now corpus=“providence”, measure=“produces”, algos=‘TP-RF-syll’

MEASURE="produces"
CORPUS="providence"
included=c("tp/relativeforward/syllable", "gold")
sub<-DATA%>%filter(measure==MEASURE, corpus==CORPUS, au %in% included)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m_b_prod <- lm(prop ~ age + log(count+1):age+ log(count+1):au:age, sub)
summary(m_b_prod)
## 
## Call:
## lm(formula = prop ~ age + log(count + 1):age + log(count + 1):au:age, 
##     data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53169 -0.11140  0.00001  0.10605  0.68784 
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                      -4.843e-01  3.877e-03
## age                                               3.912e-02  2.557e-04
## age:log(count + 1)                                9.080e-04  4.354e-05
## age:log(count + 1):autp/relativeforward/syllable  4.151e-04  3.526e-05
##                                                  t value Pr(>|t|)    
## (Intercept)                                      -124.94   <2e-16 ***
## age                                               152.98   <2e-16 ***
## age:log(count + 1)                                 20.85   <2e-16 ***
## age:log(count + 1):autp/relativeforward/syllable   11.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1576 on 15174 degrees of freedom
## Multiple R-squared:  0.7689, Adjusted R-squared:  0.7688 
## F-statistic: 1.683e+04 on 3 and 15174 DF,  p-value: < 2.2e-16
plot_model(m_b_prod, type="std",  sort.est = TRUE, show.values = TRUE, value.offset = .3)

Now corpus=“buckeye”, measure=“understands”, algos=‘TP-RF-syll’

MEASURE="understands"
CORPUS="buckeye"
included=c("tp/relativeforward/syllable", "gold")
sub<-DATA%>%filter(measure==MEASURE, corpus==CORPUS, au %in% included)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m_b_prod <- lm(prop ~ age + log(count+1):age+ log(count+1):au:age, sub)
summary(m_b_prod)
## 
## Call:
## lm(formula = prop ~ age + log(count + 1):age + log(count + 1):au:age, 
##     data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52574 -0.12719 -0.02325  0.10246  0.88614 
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                      -0.3976710  0.0106862
## age                                               0.0559301  0.0008372
## age:log(count + 1)                               -0.0010435  0.0001012
## age:log(count + 1):autp/relativeforward/syllable -0.0004653  0.0001312
##                                                  t value Pr(>|t|)    
## (Intercept)                                      -37.213  < 2e-16 ***
## age                                               66.803  < 2e-16 ***
## age:log(count + 1)                               -10.307  < 2e-16 ***
## age:log(count + 1):autp/relativeforward/syllable  -3.545 0.000396 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1925 on 5804 degrees of freedom
## Multiple R-squared:  0.4385, Adjusted R-squared:  0.4382 
## F-statistic:  1511 on 3 and 5804 DF,  p-value: < 2.2e-16
plot_model(m_b_prod, type="std",  sort.est = TRUE, show.values = TRUE, value.offset = .3)

Now corpus=“buckeye”, measure=“produces”, algos=‘TP-RF-syll’

MEASURE="produces"
CORPUS="buckeye"
included=c("tp/relativeforward/syllable", "gold")
sub<-DATA%>%filter(measure==MEASURE, corpus==CORPUS, au %in% included)
sub$au=factor(sub$au)
sub$au=relevel(sub$au,"gold")

m_b_prod <- lm(prop ~ age + log(count+1):age+ log(count+1):au:age, sub)
summary(m_b_prod)
## 
## Call:
## lm(formula = prop ~ age + log(count + 1):age + log(count + 1):au:age, 
##     data = sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51414 -0.11461 -0.00039  0.10707  0.71553 
## 
## Coefficients:
##                                                    Estimate Std. Error
## (Intercept)                                      -4.859e-01  4.357e-03
## age                                               4.458e-02  2.365e-04
## age:log(count + 1)                               -6.385e-04  3.857e-05
## age:log(count + 1):autp/relativeforward/syllable -2.264e-04  4.998e-05
##                                                  t value Pr(>|t|)    
## (Intercept)                                      -111.51  < 2e-16 ***
## age                                               188.48  < 2e-16 ***
## age:log(count + 1)                                -16.55  < 2e-16 ***
## age:log(count + 1):autp/relativeforward/syllable   -4.53 5.94e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1581 on 12092 degrees of freedom
## Multiple R-squared:  0.7649, Adjusted R-squared:  0.7648 
## F-statistic: 1.311e+04 on 3 and 12092 DF,  p-value: < 2.2e-16
plot_model(m_b_prod, type="std",  sort.est = TRUE, show.values = TRUE, value.offset = .3)

Linear mixed effect model

One big model : corpus and measure included and interacting with algorithm-unit and log frequency

Let’s fix age

AGE=13
included=c("gold", "tp/relativeforward/syllable")
x<-DATA%>%filter(age==AGE, au %in% included)
x$au=as.factor(x$au)
x$corpus=as.factor(x$corpus)
x$measure=as.factor(x$measure)
x$au=relevel(x$au,"gold")
x$corpus=relevel(x$corpus,"buckeye")
x$measure=relevel(x$measure, "understands")

m3 <- lmer(prop ~ log(freq_smoothed)+ log(freq_smoothed):au:corpus:measure +(1 + log(freq_smoothed)|uni_lemma),  x)
#m3 <- lmer(prop ~ log(count+1)+ log(count+1):au:corpus:measure +(1 + log(count+1)|uni_lemma),  x)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop ~ log(freq_smoothed) + log(freq_smoothed):au:corpus:measure +  
##     (1 + log(freq_smoothed) | uni_lemma)
##    Data: x
## 
## REML criterion at convergence: -5441.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0434 -0.7246 -0.0130  0.6959  3.3538 
## 
## Random effects:
##  Groups    Name               Variance  Std.Dev. Corr
##  uni_lemma (Intercept)        1.987e-02 0.14095      
##            log(freq_smoothed) 1.254e-06 0.00112  1.00
##  Residual                     9.841e-03 0.09920      
## Number of obs: 3696, groups:  uni_lemma, 336
## 
## Fixed effects:
##                                                                                        Estimate
## (Intercept)                                                                           1.813e-01
## log(freq_smoothed)                                                                    1.766e-02
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                           -2.829e-02
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands    -2.875e-02
## log(freq_smoothed):augold:corpusbrent:measureunderstands                             -2.899e-02
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands      -2.936e-02
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                        -2.968e-02
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands -3.017e-02
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                              -2.974e-03
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces       -2.301e-03
## log(freq_smoothed):augold:corpusbrent:measureproduces                                -1.201e-03
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces         -6.303e-04
## log(freq_smoothed):augold:corpusprovidence:measureproduces                           -7.107e-04
##                                                                                      Std. Error
## (Intercept)                                                                           1.733e-02
## log(freq_smoothed)                                                                    2.027e-03
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                            9.888e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands     9.826e-04
## log(freq_smoothed):augold:corpusbrent:measureunderstands                              9.352e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands       9.484e-04
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                         9.308e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands  9.463e-04
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                               9.888e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces        9.826e-04
## log(freq_smoothed):augold:corpusbrent:measureproduces                                 9.352e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces          9.484e-04
## log(freq_smoothed):augold:corpusprovidence:measureproduces                            9.308e-04
##                                                                                              df
## (Intercept)                                                                           1.138e+03
## log(freq_smoothed)                                                                    3.536e+03
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                            3.441e+03
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands     3.420e+03
## log(freq_smoothed):augold:corpusbrent:measureunderstands                              3.349e+03
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands       3.346e+03
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                         3.346e+03
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands  3.342e+03
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                               3.441e+03
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces        3.420e+03
## log(freq_smoothed):augold:corpusbrent:measureproduces                                 3.349e+03
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces          3.346e+03
## log(freq_smoothed):augold:corpusprovidence:measureproduces                            3.346e+03
##                                                                                      t value
## (Intercept)                                                                           10.459
## log(freq_smoothed)                                                                     8.711
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                           -28.616
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands    -29.257
## log(freq_smoothed):augold:corpusbrent:measureunderstands                             -31.000
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands      -30.958
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                        -31.885
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands -31.885
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                               -3.008
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces        -2.342
## log(freq_smoothed):augold:corpusbrent:measureproduces                                 -1.285
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces          -0.665
## log(freq_smoothed):augold:corpusprovidence:measureproduces                            -0.763
##                                                                                      Pr(>|t|)
## (Intercept)                                                                           < 2e-16
## log(freq_smoothed)                                                                    < 2e-16
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                            < 2e-16
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands     < 2e-16
## log(freq_smoothed):augold:corpusbrent:measureunderstands                              < 2e-16
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands       < 2e-16
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                         < 2e-16
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands  < 2e-16
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                               0.00265
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces        0.01923
## log(freq_smoothed):augold:corpusbrent:measureproduces                                 0.19904
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces          0.50633
## log(freq_smoothed):augold:corpusprovidence:measureproduces                            0.44524
##                                                                                         
## (Intercept)                                                                          ***
## log(freq_smoothed)                                                                   ***
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                           ***
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands    ***
## log(freq_smoothed):augold:corpusbrent:measureunderstands                             ***
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands      ***
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                        ***
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands ***
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                              ** 
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces       *  
## log(freq_smoothed):augold:corpusbrent:measureproduces                                   
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces            
## log(freq_smoothed):augold:corpusprovidence:measureproduces                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
#plot_model(m3, type = "std", sort.est = TRUE, show.values = TRUE) # not working !!!!

Interaction between corpus and measure and algorithm by 18 month

MEASURE="produces"
AGE=25
if(MEASURE=="understands"){task="comprehension"}else{task="production"}
included=c("gold", "tp/relativeforward/syllable", "ag/syllable")
x<-DATA%>%filter(age==AGE, au %in% included, measure==MEASURE)
x$au=as.factor(x$au)
x$corpus=as.factor(x$corpus)
x$au=relevel(x$au,"gold")
x$corpus=relevel(x$corpus,"buckeye")

#m <- lmer(prop ~ log_freq*as.factor(au)*as.factor(corpus) + (log(freq_smoothed)|uni_lemma) , x)
m <- lm(prop ~ log_count*au*corpus, x)

plot_model(m, type="pred", terms=c("log_count", "au", "corpus"), title=paste("Predicted values for proportion of", AGE, "mo infants", task, sep=" "))

MEASURE="understands"
AGE=16
if(MEASURE=="understands"){task="comprehension"}else{task="production"}
included=c("gold", "tp/relativeforward/syllable",  "ag/syllable")
x<-DATA%>%filter(age==AGE, au %in% included, measure==MEASURE)
x$au=as.factor(x$au)
x$corpus=as.factor(x$corpus)
x$au=relevel(x$au,"gold")
x$corpus=relevel(x$corpus,"buckeye")

#m <- lmer(prop ~ log_freq*as.factor(au)*as.factor(corpus) + (log(freq_smoothed)|uni_lemma) , x)
m <- lm(prop ~ log_count*au*corpus, x)

plot_model(m, type="pred", terms=c("log_count", "au", "corpus"), title=paste("Predicted values for proportion of", AGE, "mo infants", task, sep=" "))

Interesting right ? It seems (though I would love to properly test statistically and have the statistical power to do so) that TP-RF syllable prediction are clearly separated for comprehension for the Brent and Providnece corpus but not at all for the buckeye corpus. For production at 18 mo prediction differences are not so clear : so let’s look at the evolution of these predictions across ages.

With age as a main effect, which slopes depends The thing is : the comprehension measure is only available from 8 to 18 mo, while the production one is available from 8 to 30 mo.

included=c("gold", "tp/relativeforward/syllable")
x<-DATA%>%filter(au %in% included)
x$au=as.factor(x$au)
x$corpus=as.factor(x$corpus)
x$measure=as.factor(x$measure)

x$au=relevel(x$au,"gold")
x$corpus=relevel(x$corpus,"buckeye")
x$measure=relevel(x$measure, "understands")
m4 <- lmer(prop ~ age*log(freq_smoothed)+ log(freq_smoothed):au:corpus:measure +(1+age+ log(freq_smoothed)|uni_lemma),  x)
summary(m4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## prop ~ age * log(freq_smoothed) + log(freq_smoothed):au:corpus:measure +  
##     (1 + age + log(freq_smoothed) | uni_lemma)
##    Data: x
## 
## REML criterion at convergence: -103058.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9019 -0.7063 -0.0639  0.5986  4.1477 
## 
## Random effects:
##  Groups    Name               Variance  Std.Dev.  Corr       
##  uni_lemma (Intercept)        1.749e-02 0.1322546            
##            age                7.102e-05 0.0084271 -0.54      
##            log(freq_smoothed) 1.129e-08 0.0001062  0.62  0.32
##  Residual                     1.073e-02 0.1035820            
## Number of obs: 62688, groups:  uni_lemma, 336
## 
## Fixed effects:
##                                                                                        Estimate
## (Intercept)                                                                          -3.275e-01
## age                                                                                   3.809e-02
## log(freq_smoothed)                                                                    1.995e-02
## age:log(freq_smoothed)                                                               -6.462e-04
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                           -2.440e-02
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands    -2.500e-02
## log(freq_smoothed):augold:corpusbrent:measureunderstands                             -2.573e-02
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands      -2.626e-02
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                        -2.628e-02
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands -2.693e-02
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                              -1.284e-03
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces       -9.831e-04
## log(freq_smoothed):augold:corpusbrent:measureproduces                                -5.804e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces         -3.103e-04
## log(freq_smoothed):augold:corpusprovidence:measureproduces                           -3.289e-04
##                                                                                      Std. Error
## (Intercept)                                                                           1.214e-02
## age                                                                                   6.952e-04
## log(freq_smoothed)                                                                    1.146e-03
## age:log(freq_smoothed)                                                                6.029e-05
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                            2.696e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands     2.692e-04
## log(freq_smoothed):augold:corpusbrent:measureunderstands                              2.572e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands       2.630e-04
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                         2.559e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands  2.626e-04
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                               2.249e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces        2.209e-04
## log(freq_smoothed):augold:corpusbrent:measureproduces                                 2.045e-04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces          2.070e-04
## log(freq_smoothed):augold:corpusprovidence:measureproduces                            2.033e-04
##                                                                                              df
## (Intercept)                                                                           1.808e+03
## age                                                                                   1.545e+03
## log(freq_smoothed)                                                                    4.813e+04
## age:log(freq_smoothed)                                                                4.964e+04
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                            6.211e+04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands     6.217e+04
## log(freq_smoothed):augold:corpusbrent:measureunderstands                              6.199e+04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands       6.204e+04
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                         6.201e+04
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands  6.207e+04
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                               6.197e+04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces        6.212e+04
## log(freq_smoothed):augold:corpusbrent:measureproduces                                 6.201e+04
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces          6.201e+04
## log(freq_smoothed):augold:corpusprovidence:measureproduces                            6.201e+04
##                                                                                       t value
## (Intercept)                                                                           -26.985
## age                                                                                    54.783
## log(freq_smoothed)                                                                     17.413
## age:log(freq_smoothed)                                                                -10.719
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                            -90.496
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands     -92.844
## log(freq_smoothed):augold:corpusbrent:measureunderstands                             -100.029
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands       -99.871
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                        -102.682
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands -102.559
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                                -5.710
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces         -4.451
## log(freq_smoothed):augold:corpusbrent:measureproduces                                  -2.838
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces           -1.499
## log(freq_smoothed):augold:corpusprovidence:measureproduces                             -1.618
##                                                                                      Pr(>|t|)
## (Intercept)                                                                           < 2e-16
## age                                                                                   < 2e-16
## log(freq_smoothed)                                                                    < 2e-16
## age:log(freq_smoothed)                                                                < 2e-16
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                            < 2e-16
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands     < 2e-16
## log(freq_smoothed):augold:corpusbrent:measureunderstands                              < 2e-16
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands       < 2e-16
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                         < 2e-16
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands  < 2e-16
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                              1.13e-08
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces       8.57e-06
## log(freq_smoothed):augold:corpusbrent:measureproduces                                 0.00454
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces          0.13394
## log(freq_smoothed):augold:corpusprovidence:measureproduces                            0.10562
##                                                                                         
## (Intercept)                                                                          ***
## age                                                                                  ***
## log(freq_smoothed)                                                                   ***
## age:log(freq_smoothed)                                                               ***
## log(freq_smoothed):augold:corpusbuckeye:measureunderstands                           ***
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureunderstands    ***
## log(freq_smoothed):augold:corpusbrent:measureunderstands                             ***
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureunderstands      ***
## log(freq_smoothed):augold:corpusprovidence:measureunderstands                        ***
## log(freq_smoothed):autp/relativeforward/syllable:corpusprovidence:measureunderstands ***
## log(freq_smoothed):augold:corpusbuckeye:measureproduces                              ***
## log(freq_smoothed):autp/relativeforward/syllable:corpusbuckeye:measureproduces       ***
## log(freq_smoothed):augold:corpusbrent:measureproduces                                ** 
## log(freq_smoothed):autp/relativeforward/syllable:corpusbrent:measureproduces            
## log(freq_smoothed):augold:corpusprovidence:measureproduces                              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

Checking model assumptions : Arghh;…

plot_model(m0, type="diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

plot_model(m1, type="diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

plot_model(m2, type="diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

plot_model(m3, type="diag")
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]